Doing tests on the full dataset 1. Make new file with full dataset like for Palgen workflow 2. Make graphs with median of each donor for all time points - CD66, CD45, etc. 3. Model a spline regression for each marker on all timepoints
Load packages.
library("CytoGLMM")## Warning: replacing previous import 'MASS::select' by 'dplyr::select' when
## loading 'CytoGLMM'
## Warning: replacing previous import 'cowplot::ggsave' by 'ggplot2::ggsave'
## when loading 'CytoGLMM'
## Warning: replacing previous import 'stringr::boundary' by
## 'strucchange::boundary' when loading 'CytoGLMM'
library("tidyverse")
library("magrittr")
library("FlowRepositoryR")
library("flowCore")
library("openCyto")
library("ggcyto")
library("scales")
library("parallel")
library("RColorBrewer")
library("ggcorrplot")
library("SummarizedExperiment")
library("lme4")
library("lmerTest")
library(mgcv)# setting computational resources
ncores = parallel::detectCores()After downloading the data, a sample table is created by parsing the fcs filenames.
fcs_filesVA2 = list.files(path = "FlowRepository_FR-FCM-ZYPY_files", pattern = "fcs")
#Make new column with three levels related to term
map_term = function(x) {
if (str_detect(x, "BPD019H00|PPD000H00")) "Before-prime"
else if (str_detect(x, "PPD000H03|PPD000H06|PPD001H00|PPD003H00|PPD014H00|PBD000H00")) "Post-prime"
else if (str_detect(x, "PBD000H03|PBD000H06|PBD001H00|PBD003H00")) "Post-boost"
else NA
}
sample_tableVA2 = tibble(
donor = str_extract(fcs_filesVA2, "_B[B-D]{1}..."),
term = str_extract(fcs_filesVA2, "..D0..H0.") %>% as.factor,
grouped_term = sapply(fcs_filesVA2, map_term) %>% as.factor,
file_name = paste0("FlowRepository_FR-FCM-ZYPY_files/",fcs_filesVA2)
)
sample_tableVA2$donor = gsub("_", "", sample_tableVA2$donor)
sample_tableVA2## # A tibble: 43 x 4
## donor term grouped_term file_name
## <chr> <fct> <fct> <chr>
## 1 BB078 BPD019H00 Before-prime FlowRepository_FR-FCM-ZYPY_files/BPD019H00…
## 2 BB231 BPD019H00 Before-prime FlowRepository_FR-FCM-ZYPY_files/BPD019H00…
## 3 BC641 BPD019H00 Before-prime FlowRepository_FR-FCM-ZYPY_files/BPD019H00…
## 4 BD620 BPD019H00 Before-prime FlowRepository_FR-FCM-ZYPY_files/BPD019H00…
## 5 BB078 PBD000H00 Post-prime FlowRepository_FR-FCM-ZYPY_files/PBD000H00…
## 6 BB231 PBD000H00 Post-prime FlowRepository_FR-FCM-ZYPY_files/PBD000H00…
## 7 BD620 PBD000H00 Post-prime FlowRepository_FR-FCM-ZYPY_files/PBD000H00…
## 8 BB078 PBD000H03 Post-boost FlowRepository_FR-FCM-ZYPY_files/PBD000H03…
## 9 BB231 PBD000H03 Post-boost FlowRepository_FR-FCM-ZYPY_files/PBD000H03…
## 10 BC641 PBD000H03 Post-boost FlowRepository_FR-FCM-ZYPY_files/PBD000H03…
## # … with 33 more rows
A marker table is created by extracting the marker isotopes and protein names. A third column is added to determine whether the markers were used for identifying cells (phenotype), are functional proteins, or were unused. Palgen et al. (2019) state in their paper that due to low reactivity multiple markers needed to be excluded from the analysis.
# names of markers used for gating extracted from the paper
map_type = function(x) {
if (str_detect(x, paste(c("CD66", "HLADR", "CD3", "CD107a", "CD8", "CD45", "GranzymeB", "CD56", "CD62L", "CD4", "CD11a", "CD2", "CD7", "NKG2D", "CD11c", "CD69", "CD25", "CD16", "CCR5", "CXCR4", "CD14", "perforine", "NKG2A", "CD20", "CCR7"),collapse = '|'))) "phenotype"
else if (str_detect(x, paste(c("Di", "Time", "Cell_length", "cells", "File Number"),collapse = '|'))) "unused"
else "function"
}
#Creating marker table
y <- sample_tableVA2$file_name[1]
fcs = read.FCS(y, transformation = FALSE)
isotope = pData(parameters(fcs))[,"name"]
protein_name = pData(parameters(fcs))[,"desc"]
type = sapply(protein_name, map_type)
tb_marker = tibble(isotope, protein_name, type)
tb_marker## # A tibble: 45 x 3
## isotope protein_name type
## <I<chr>> <I<chr>> <chr>
## 1 Time Time unused
## 2 Cell_length Cell_length unused
## 3 (Rh103)Di (Rh103)Di unused
## 4 (Xe131)Di (Xe131)Di unused
## 5 (Cs133)Di (Cs133)Di unused
## 6 (La139)Di (La139)Di unused
## 7 (Ce140)Di (Ce140)Di unused
## 8 (Pr141)Di CD66 phenotype
## 9 (Nd142)Di HLADR phenotype
## 10 (Nd143)Di CD3 phenotype
## # … with 35 more rows
#deleting before last two rows, 43 & 44 as their protein_name was 'cells'
tb_marker <- tb_marker[-c(43,44), ]
tb_marker## # A tibble: 43 x 3
## isotope protein_name type
## <I<chr>> <I<chr>> <chr>
## 1 Time Time unused
## 2 Cell_length Cell_length unused
## 3 (Rh103)Di (Rh103)Di unused
## 4 (Xe131)Di (Xe131)Di unused
## 5 (Cs133)Di (Cs133)Di unused
## 6 (La139)Di (La139)Di unused
## 7 (Ce140)Di (Ce140)Di unused
## 8 (Pr141)Di CD66 phenotype
## 9 (Nd142)Di HLADR phenotype
## 10 (Nd143)Di CD3 phenotype
## # … with 33 more rows
As claimed by the authors (Palgen et al., 2019) the data has been normalized and gated. The cells contained are assumed to be natural killer (NK) cells as the paper only focuses on NK cells.
# load data
fset2 = read.ncdfFlowSet(sample_tableVA2$file_name, mc.cores = 2)
pData(fset2) = cbind(pData(fset2),sample_tableVA2)
df_samples2 = lapply(seq(fset2), function(sample_id) {
marker_ids = which(fset2@colnames %in% tb_marker$isotope)
exprs = as_tibble(exprs(fset2[[sample_id]]))[,marker_ids]
file_name = pData(fset2[sample_id])$file_name
exprs %>% add_column(file_name)
}) %>% bind_rows
str(df_samples2)## Classes 'tbl_df', 'tbl' and 'data.frame': 7262238 obs. of 44 variables:
## $ Time : num 14 50 74 121 122 180 191 196 211 242 ...
## $ Cell_length: num 20 34 18 21 22 19 28 18 29 20 ...
## $ (Rh103)Di : num -0.107 -0.814 -0.608 -0.799 -0.655 ...
## $ (Xe131)Di : num -0.533 -0.3947 -0.6889 -0.4583 -0.0429 ...
## $ (Cs133)Di : num -0.75592 -0.73083 -0.00585 -0.38208 -0.18158 ...
## $ (La139)Di : num 1.866 0.689 -0.129 -0.136 -0.23 ...
## $ (Ce140)Di : num -0.0534 -0.0871 -0.1385 0.7735 -0.4707 ...
## $ (Pr141)Di : num 27.112 44.846 29.033 2.055 0.689 ...
## $ (Nd142)Di : num 6.687 66.422 -0.378 7.603 -0.722 ...
## $ (Nd143)Di : num 2.87 4.01 2.16 1.88 14 ...
## $ (Nd144)Di : num 0.356 80.026 -0.357 69.208 7.077 ...
## $ (Nd145)Di : num -0.51 4.095 -0.286 4.316 0.012 ...
## $ (Nd146)Di : num 14.77 5.04 7.51 2.72 10.73 ...
## $ (Sm147)Di : num -0.317 1.608 1.358 -0.53 3.5 ...
## $ (Nd148)Di : num 0.257 5.051 1.246 0.83 0.429 ...
## $ (Sm149)Di : num -0.117 35.481 -0.848 4.763 2.298 ...
## $ (Nd150)Di : num 2.2454 4.1198 0.7394 1.9436 -0.0443 ...
## $ (Eu151)Di : num -0.95 -0.313 -0.64 -0.655 -0.898 ...
## $ (Sm152)Di : num -0.512 5.703 0.813 2.899 4.556 ...
## $ (Eu153)Di : num 12.2 18.39 5.6 134.34 3.82 ...
## $ (Sm154)Di : num 10.77 -0.252 -0.55 1.098 42.318 ...
## $ (Gd155)Di : num -0.954 -0.265 0.108 -0.29 11.992 ...
## $ (Gd156)Di : num -0.84 -0.153 0.767 -0.428 -0.232 ...
## $ (Gd158)Di : num -0.3801 11.2318 -0.7972 -0.0977 -0.8671 ...
## $ (Tb159)Di : num -0.909 0.192 2.478 -0.414 -0.341 ...
## $ (Gd160)Di : num -0.179 1.671 -0.468 -0.484 -0.649 ...
## $ (Dy161)Di : num 1.231 0.455 2.82 0.308 -0.717 ...
## $ (Dy162)Di : num 10.425 0.971 3.853 36.949 -0.747 ...
## $ (Dy163)Di : num -0.3526 2.1858 1.8362 -0.2632 -0.0234 ...
## $ (Dy164)Di : num 1.991 4.39 1.224 0.611 -0.828 ...
## $ (Ho165)Di : num 0.241 1.261 0.587 -0.884 -0.879 ...
## $ (Er166)Di : num 10.22 19.1 3.66 4.08 4.82 ...
## $ (Er167)Di : num 2.834 10.3 4.401 35.658 0.948 ...
## $ (Er168)Di : num 4.74 23.66 1.67 2.37 9.24 ...
## $ (Tm169)Di : num 0.85 14.67 3.41 4.19 1.01 ...
## $ (Er170)Di : num 3.6009 24.8262 0.0821 -0.4231 -0.5585 ...
## $ (Yb171)Di : num 2.072 7.521 0.244 3.787 1.558 ...
## $ (Yb172)Di : num -0.501 3.983 0.436 9.772 -0.371 ...
## $ (Yb173)Di : num -0.575 -0.293 -0.654 0.223 -0.303 ...
## $ (Yb174)Di : num 13.2755 0.4976 5.2598 0.0817 0.5372 ...
## $ (Lu175)Di : num 6.017 17.21 6.324 0.391 2.42 ...
## $ (Lu176)Di : num 1.68 4.63 1.91 1.85 2.05 ...
## $ FileNum : num 0.961 1.088 0.958 0.91 0.991 ...
## $ file_name : chr "FlowRepository_FR-FCM-ZYPY_files/BPD019H00_BB078.fcs" "FlowRepository_FR-FCM-ZYPY_files/BPD019H00_BB078.fcs" "FlowRepository_FR-FCM-ZYPY_files/BPD019H00_BB078.fcs" "FlowRepository_FR-FCM-ZYPY_files/BPD019H00_BB078.fcs" ...
df_samples2 %<>% inner_join(sample_tableVA2,by = "file_name")
oldnames = tb_marker$isotope
newnames = tb_marker$protein_name
df_samples2 %<>% rename_at(vars(oldnames), ~ newnames)
str(df_samples2)## Classes 'tbl_df', 'tbl' and 'data.frame': 7262238 obs. of 47 variables:
## $ Time : num 14 50 74 121 122 180 191 196 211 242 ...
## $ Cell_length : num 20 34 18 21 22 19 28 18 29 20 ...
## $ (Rh103)Di : num -0.107 -0.814 -0.608 -0.799 -0.655 ...
## $ (Xe131)Di : num -0.533 -0.3947 -0.6889 -0.4583 -0.0429 ...
## $ (Cs133)Di : num -0.75592 -0.73083 -0.00585 -0.38208 -0.18158 ...
## $ (La139)Di : num 1.866 0.689 -0.129 -0.136 -0.23 ...
## $ (Ce140)Di : num -0.0534 -0.0871 -0.1385 0.7735 -0.4707 ...
## $ CD66 : num 27.112 44.846 29.033 2.055 0.689 ...
## $ HLADR : num 6.687 66.422 -0.378 7.603 -0.722 ...
## $ CD3 : num 2.87 4.01 2.16 1.88 14 ...
## $ CD107a : num 0.356 80.026 -0.357 69.208 7.077 ...
## $ CD8 : num -0.51 4.095 -0.286 4.316 0.012 ...
## $ CD45 : num 14.77 5.04 7.51 2.72 10.73 ...
## $ IL4 : num -0.317 1.608 1.358 -0.53 3.5 ...
## $ GranzymeB : num 0.257 5.051 1.246 0.83 0.429 ...
## $ CD56 : num -0.117 35.481 -0.848 4.763 2.298 ...
## $ CD62L : num 2.2454 4.1198 0.7394 1.9436 -0.0443 ...
## $ (Eu151)Di : num -0.95 -0.313 -0.64 -0.655 -0.898 ...
## $ CD4 : num -0.512 5.703 0.813 2.899 4.556 ...
## $ CD11a : num 12.2 18.39 5.6 134.34 3.82 ...
## $ CD2 : num 10.77 -0.252 -0.55 1.098 42.318 ...
## $ CD7 : num -0.954 -0.265 0.108 -0.29 11.992 ...
## $ MIP1B : num -0.84 -0.153 0.767 -0.428 -0.232 ...
## $ (Gd158)Di : num -0.3801 11.2318 -0.7972 -0.0977 -0.8671 ...
## $ TNFa : num -0.909 0.192 2.478 -0.414 -0.341 ...
## $ Ki67 : num -0.179 1.671 -0.468 -0.484 -0.649 ...
## $ NKG2D : num 1.231 0.455 2.82 0.308 -0.717 ...
## $ CD11c : num 10.425 0.971 3.853 36.949 -0.747 ...
## $ (Dy163)Di : num -0.3526 2.1858 1.8362 -0.2632 -0.0234 ...
## $ CD69 : num 1.991 4.39 1.224 0.611 -0.828 ...
## $ IFNg : num 0.241 1.261 0.587 -0.884 -0.879 ...
## $ CD25 : num 10.22 19.1 3.66 4.08 4.82 ...
## $ CD16 : num 2.834 10.3 4.401 35.658 0.948 ...
## $ CCR5 : num 4.74 23.66 1.67 2.37 9.24 ...
## $ CXCR4 : num 0.85 14.67 3.41 4.19 1.01 ...
## $ CD14 : num 3.6009 24.8262 0.0821 -0.4231 -0.5585 ...
## $ perforine : num 2.072 7.521 0.244 3.787 1.558 ...
## $ NKG2A : num -0.501 3.983 0.436 9.772 -0.371 ...
## $ (Yb173)Di : num -0.575 -0.293 -0.654 0.223 -0.303 ...
## $ CD20 : num 13.2755 0.4976 5.2598 0.0817 0.5372 ...
## $ CCR7 : num 6.017 17.21 6.324 0.391 2.42 ...
## $ IL10 : num 1.68 4.63 1.91 1.85 2.05 ...
## $ File Number : num 0.961 1.088 0.958 0.91 0.991 ...
## $ file_name : chr "FlowRepository_FR-FCM-ZYPY_files/BPD019H00_BB078.fcs" "FlowRepository_FR-FCM-ZYPY_files/BPD019H00_BB078.fcs" "FlowRepository_FR-FCM-ZYPY_files/BPD019H00_BB078.fcs" "FlowRepository_FR-FCM-ZYPY_files/BPD019H00_BB078.fcs" ...
## $ donor : chr "BB078" "BB078" "BB078" "BB078" ...
## $ term : Factor w/ 12 levels "BPD019H00","PBD000H00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ grouped_term: Factor w/ 3 levels "Before-prime",..: 1 1 1 1 1 1 1 1 1 1 ...
#factor
df_samples2$term %<>% factor(levels = c("BPD019H00", "PPD000H00", "PPD000H03", "PPD000H06", "PPD001H00", "PPD003H00", "PPD014H00", "PBD000H00", "PBD000H03", "PBD000H06", "PBD001H00", "PBD003H00"))
df_samples2$grouped_term %<>% factor(levels = c("Before-prime", "Post-prime", "Post-boost"))Cell counts listed per donor and condition
table(df_samples2$donor,df_samples2$term)##
## BPD019H00 PPD000H00 PPD000H03 PPD000H06 PPD001H00 PPD003H00
## BB078 115117 74607 178094 163789 170962 123565
## BB231 84527 93135 153506 228105 220444 123631
## BC641 101816 0 135239 172985 150975 127794
## BD620 67316 67358 120086 93624 135219 0
##
## PPD014H00 PBD000H00 PBD000H03 PBD000H06 PBD001H00 PBD003H00
## BB078 188553 170024 243760 206027 299428 106990
## BB231 155249 222997 310251 330010 282710 191831
## BC641 192715 0 96078 264225 359989 0
## BD620 123768 149477 0 171421 151180 143661
#all used proteins
protein_names_all = tb_marker %>%
dplyr::filter(type != "unused") %>%
.$protein_name
protein_names_all## [1] "CD66" "HLADR" "CD3" "CD107a" "CD8"
## [6] "CD45" "IL4" "GranzymeB" "CD56" "CD62L"
## [11] "CD4" "CD11a" "CD2" "CD7" "MIP1B"
## [16] "TNFa" "Ki67" "NKG2D" "CD11c" "CD69"
## [21] "IFNg" "CD25" "CD16" "CCR5" "CXCR4"
## [26] "CD14" "perforine" "NKG2A" "CD20" "CCR7"
## [31] "IL10"
#declare columns that are not protein markers
sample_info_names = c(names(sample_tableVA2))
#transform
trans_func = function(x) asinh(x/5)
df_samples_tfm2 = df_samples2 %>% mutate_at(protein_names_all, trans_func)Subsample data, as models on full data are computationally intensive and models are very similar.
ncells = 1000
if(nrow(df_samples_tfm2) > ncells) {
print(paste("subsampled to",ncells,"per donor"))
set.seed(2019)
# subsample depending on max cell count
df_count = df_samples_tfm2 %>% group_by(donor) %>% tally() %>%
mutate(nnew = ifelse(n > ncells,ncells,n))
# create table with a data frame in one column
df_nested = df_samples_tfm2 %>% group_by(donor) %>% nest() %>%
left_join(df_count,by = "donor")
# subsample per donor
df_samples_tfm2 = df_nested %>%
mutate(samp = map2(data, nnew, sample_n)) %>%
dplyr::select(donor, samp) %>%
unnest()
} else {
print("no subsampling done")
}## [1] "subsampled to 1000 per donor"
Plot all celltypes.
#defining numeric term for spline regression
df_samples_tfm2$numeric_term = df_samples_tfm2$term
df_samples_tfm2$numeric_term = as.numeric(df_samples_tfm2$numeric_term)
df_samples_tfm2## # A tibble: 4,000 x 48
## donor Time Cell_length `(Rh103)Di` `(Xe131)Di` `(Cs133)Di` `(La139)Di`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 BB078 50681 24 -0.815 -0.460 -0.455 -0.891
## 2 BB078 278371 27 -0.277 4.08 -0.338 2.19
## 3 BB078 419519 21 -0.211 -0.632 -0.0601 2.54
## 4 BB078 160818 22 -0.314 -0.414 -0.555 0.00716
## 5 BB078 951768 23 -0.122 0.127 -0.0156 1.56
## 6 BB078 765817 32 -0.0401 -0.456 -0.268 -0.462
## 7 BB078 703272 20 -0.427 -0.0764 -0.0495 0.0739
## 8 BB078 111716 20 -0.744 -0.0739 -0.687 -0.106
## 9 BB078 630364 21 -0.742 1.39 -0.120 4.57
## 10 BB078 88940 21 -0.360 -0.518 -0.221 1.04
## # … with 3,990 more rows, and 41 more variables: `(Ce140)Di` <dbl>,
## # CD66 <dbl>, HLADR <dbl>, CD3 <dbl>, CD107a <dbl>, CD8 <dbl>,
## # CD45 <dbl>, IL4 <dbl>, GranzymeB <dbl>, CD56 <dbl>, CD62L <dbl>,
## # `(Eu151)Di` <dbl>, CD4 <dbl>, CD11a <dbl>, CD2 <dbl>, CD7 <dbl>,
## # MIP1B <dbl>, `(Gd158)Di` <dbl>, TNFa <dbl>, Ki67 <dbl>, NKG2D <dbl>,
## # CD11c <dbl>, `(Dy163)Di` <dbl>, CD69 <dbl>, IFNg <dbl>, CD25 <dbl>,
## # CD16 <dbl>, CCR5 <dbl>, CXCR4 <dbl>, CD14 <dbl>, perforine <dbl>,
## # NKG2A <dbl>, `(Yb173)Di` <dbl>, CD20 <dbl>, CCR7 <dbl>, IL10 <dbl>,
## # `File Number` <dbl>, file_name <chr>, term <fct>, grouped_term <fct>,
## # numeric_term <dbl>
#all proteins
df_median_fct2 = df_samples_tfm2 %>%
group_by(file_name, donor, term, grouped_term, numeric_term) %>%
summarise_at(protein_names_all, median)
df_median_fct2## # A tibble: 43 x 36
## # Groups: file_name, donor, term, grouped_term [43]
## file_name donor term grouped_term numeric_term CD66 HLADR CD3
## <chr> <chr> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 FlowRepo… BB078 BPD0… Before-prime 1 2.67 0.202 0.604
## 2 FlowRepo… BB231 BPD0… Before-prime 1 2.64 0.147 0.354
## 3 FlowRepo… BC641 BPD0… Before-prime 1 0.915 0.0607 0.534
## 4 FlowRepo… BD620 BPD0… Before-prime 1 2.93 0.531 0.742
## 5 FlowRepo… BB078 PBD0… Post-prime 8 4.07 0.316 0.344
## 6 FlowRepo… BB231 PBD0… Post-prime 8 3.75 0.194 0.206
## 7 FlowRepo… BD620 PBD0… Post-prime 8 3.49 0.653 0.549
## 8 FlowRepo… BB078 PBD0… Post-boost 9 3.90 0.268 0.306
## 9 FlowRepo… BB231 PBD0… Post-boost 9 3.64 0.159 0.125
## 10 FlowRepo… BC641 PBD0… Post-boost 9 3.42 0.536 0.572
## # … with 33 more rows, and 28 more variables: CD107a <dbl>, CD8 <dbl>,
## # CD45 <dbl>, IL4 <dbl>, GranzymeB <dbl>, CD56 <dbl>, CD62L <dbl>,
## # CD4 <dbl>, CD11a <dbl>, CD2 <dbl>, CD7 <dbl>, MIP1B <dbl>, TNFa <dbl>,
## # Ki67 <dbl>, NKG2D <dbl>, CD11c <dbl>, CD69 <dbl>, IFNg <dbl>,
## # CD25 <dbl>, CD16 <dbl>, CCR5 <dbl>, CXCR4 <dbl>, CD14 <dbl>,
## # perforine <dbl>, NKG2A <dbl>, CD20 <dbl>, CCR7 <dbl>, IL10 <dbl>
Make combined table to plot everything on same diagram (median markers & spline)
#convert term into timepoints in hours (& into rank)
df_combined2 = df_samples_tfm2 %>% mutate(
term_hours = case_when(term == "BPD019H00" ~ 0,
term == "PPD000H00" ~ 456,
term == "PPD000H03" ~ 459,
term == "PPD000H06" ~ 462,
term == "PPD001H00" ~ 480,
term == "PPD003H00" ~ 528,
term == "PPD014H00" ~ 792,
term == "PBD000H00" ~ 1848,
term == "PBD000H03" ~ 1851,
term == "PBD000H06" ~ 1854,
term == "PBD001H00" ~ 1872,
term == "PBD003H00" ~ 1920)
)
#median marker plotting
ggplot(df_combined2, aes(term_hours, CD66)) +
geom_point() + geom_smooth(method = gam) +
theme(axis.text.x = element_text(angle = 45, vjust=0))#spline regression model & x axis values
model <- gam(CD66 ~ s(numeric_term), data = df_samples_tfm2)
lablist = c("BPD019H00", "PPD000H00", "PPD000H03", "PPD000H06", "PPD001H00", "PPD003H00", "PPD014H00", "PBD000H00", "PBD000H03", "PBD000H06", "PBD001H00", "PBD003H00")
#base R combined plots - make look prettier
plot(df_median_fct2$numeric_term, df_median_fct2$CD66, col=df_median_fct2$grouped_term, main = "Spline regression and donor median markers of CD66 for all timepoints",
xlab = "time", ylab = "CD66", xaxt="n")
axis(1, at=1:12, labels=FALSE)
text(seq(1, 12, by=1), par("usr")[3] - 0.2, labels = lablist, srt = 45, adj = c(1.1,1.1), pos = 1, xpd = TRUE, cex=.8)
par(new=TRUE)
plot(model, axes = FALSE, ann=FALSE)## Warning in plot.xy(xy.coords(x, y), type = type, ...): "axes" is not a
## graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "axes" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "axes" is not
## a graphical parameter
plot(df_samples_tfm2$numeric_term, df_samples_tfm2$CD66)
lines(smooth.spline(df_samples_tfm2$numeric_term, df_samples_tfm2$CD66))plot(df_median_fct2$numeric_term, df_median_fct2$CD66, type = "p", col=df_median_fct2$grouped_term)
lines(smooth.spline(df_samples_tfm2$numeric_term, df_samples_tfm2$CD66))#lines a bit choppier, each x point is a knot, doesn't change y axis
plot(model)
lines(smooth.spline(df_samples_tfm2$numeric_term, df_samples_tfm2$CD66)) Comparing different types of splines. smooth.spline is a linear more basic spline function. gam is a generalized linear model that corrects for more.
Zoom in on marker CD66
#median marker plotting
ggplot(df_median_fct2, aes(term, CD66, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
theme(axis.text.x = element_text(angle = 45, vjust=0))#median marker plotting per donor
ggplot(df_median_fct2, aes(term, CD66, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
facet_wrap(~donor) +
theme(axis.text.x = element_text(angle = 45, vjust=0)) #spline regression
model <- gam(CD66 ~ s(numeric_term), data = df_samples_tfm2)
plot(model,
main = "Spline regression of protein marker CD66 for all timepoints",
xlab = "term")#abline(v = 3, col = "green") - Error "plot.new has not been called yet"Prime seems to be similar to beginning of prime and then downards slope. Just before boost much higher, then downwards slope again within boost. Much more significant shift, y axis up to 4. ! Dip after prime may also be explained by total NK cell amounts going down authors said after vaccination. Really seems that each injection causes a spike in CD66 and it then slowly goes down,and at boost it’s a much steeper increase then for the prime. Numbers on y axis also much higher than others (up to 4). Interestingly, strong increase happens just before the boost injection. One of the markers where donors are the most alike, less spread.
Looking into CD45
ggplot(df_median_fct2, aes(term, CD45, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
theme(axis.text.x = element_text(angle = 45, vjust=0)) + ggtitle("CD45 median marker expressions per donor")#spline
model <- gam(CD45 ~ s(numeric_term), data = df_samples_tfm2)
plot(model,
main = "Spline regression of protein marker CD45 for all timepoints",
xlab = "term")#combined plot
model <- gam(CD45 ~ s(numeric_term), data = df_samples_tfm2)
plot(df_median_fct2$numeric_term, df_median_fct2$CD45, col=df_median_fct2$grouped_term, main = "Spline regression and donor median markers of CD45 for all timepoints",
xlab = "time", ylab = "CD45", xaxt="n")
axis(1, at=1:12, labels=FALSE)
text(seq(1, 12, by=1), par("usr")[3] - 0.1, labels = lablist, srt = 45, adj = c(1.1,1.1), pos = 1, xpd = TRUE, cex=.8)
par(new=TRUE)
plot(model, axes = FALSE, ann=FALSE)## Warning in plot.xy(xy.coords(x, y), type = type, ...): "axes" is not a
## graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "axes" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "axes" is not
## a graphical parameter
#simpler spline regression
plot(df_median_fct2$numeric_term, df_median_fct2$CD45, col=df_median_fct2$grouped_term)
lines(smooth.spline(df_samples_tfm2$numeric_term, df_samples_tfm2$CD45)) CD45 also seems to start off quite high before prime, then go down after prime vaccination, then before boost it is higher again and gently goes down after boost. Y axis goes up to 2. Prime doesn’t seem to stimulate CD45 much initially. Might be a marker that takes longer to take effect.
Looking further into CD56
ggplot(df_median_fct2, aes(term, CD56, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
theme(axis.text.x = element_text(angle = 45, vjust=0))ggplot(df_median_fct2, aes(term, CD56, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
facet_wrap(~donor) + theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust=0))#spline regression
model <- gam(CD56 ~ s(numeric_term), data = df_samples_tfm2)
plot(model,
main = "Spline regression of protein marker CD56 for all timepoints",
xlab = "term")#combined plot
model <- gam(CD56 ~ s(numeric_term), data = df_samples_tfm2)
plot(df_median_fct2$numeric_term, df_median_fct2$CD56, col=df_median_fct2$grouped_term, main = "Spline regression and donor median markers of CD56 for all timepoints",
xlab = "time", ylab = "CD56", xaxt="n")
axis(1, at=1:12, labels=FALSE)
text(seq(1, 12, by=1), par("usr")[3] - 0.2, labels = lablist, srt = 45, adj = c(1.1,1.1), pos = 1, xpd = TRUE, cex=.8)
par(new=TRUE)
plot(model, axes = FALSE, ann=FALSE)## Warning in plot.xy(xy.coords(x, y), type = type, ...): "axes" is not a
## graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "axes" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "axes" is not
## a graphical parameter
#simple spline
plot(df_median_fct2$numeric_term, df_median_fct2$CD56, type = "p", col=df_median_fct2$grouped_term)
lines(smooth.spline(df_samples_tfm2$numeric_term, df_samples_tfm2$CD56))General downwards slope from prime to boost. Large variation in prime amongst donors. Scale only goes up to 1.2 on y axis. Facetwrap interesting as see downards slope better, especially BB078 and BB231 as simply their baseline differs.
Looking into HLADR
ggplot(df_median_fct2, aes(term, HLADR, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
theme(axis.text.x = element_text(angle = 45, vjust=0))ggplot(df_median_fct2, aes(term, HLADR, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
facet_wrap(~donor) + theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust=0))#spline regression
model <- gam(HLADR ~ s(numeric_term), data = df_samples_tfm2)
plot(model,
main = "Spline regression of protein marker HLADR for all timepoints",
xlab = "term") Seems to have slightly similar pattern to CD45, higher in boost than in prime in general, not strong downwards slopes. Y axis quite small ongy going to 0.6.
Looking into CD107a
ggplot(df_median_fct2, aes(term, CD107a, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
theme(axis.text.x = element_text(angle = 45, vjust=0))ggplot(df_median_fct2, aes(term, CD107a, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
facet_wrap(~donor) +
theme(axis.text.x = element_text(angle = 45, vjust=0))#spline regression
model <- gam(CD107a ~ s(numeric_term), data = df_samples_tfm2)
plot(model,
main = "Spline regression of protein marker CD107a for all timepoints",
xlab = "term") No clear pattern. Seems to be in general, per donor, higher beginning and at boost end, but inconsistent. Y axis is only up to 0.75.
Looking into perforin
model <- gam(perforine ~ s(numeric_term), data = df_samples_tfm2)
plot(model,
main = "Spline regression of protein marker perforin for all timepoints",
xlab = "term")ggplot(df_median_fct2, aes(term, perforine, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
theme(axis.text.x = element_text(angle = 45, vjust=0))ggplot(df_median_fct2, aes(term, perforine, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
facet_wrap(~donor) +
theme(axis.text.x = element_text(angle = 45, vjust=0))Look into CD16
#median marker plotting
ggplot(df_median_fct2, aes(term, CD16, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
theme(axis.text.x = element_text(angle = 45, vjust=0))#median marker plotting per donor
ggplot(df_median_fct2, aes(term, CD16, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
facet_wrap(~donor) +
theme(axis.text.x = element_text(angle = 45, vjust=0)) #spline regression
model <- gam(CD16 ~ s(numeric_term), data = df_samples_tfm2)
plot(model,
main = "Spline regression of protein marker CD16 for all timepoints",
xlab = "term")model <- gam(CD16 ~ s(numeric_term), data = df_samples_tfm2)
plot(df_median_fct2$numeric_term, df_median_fct2$CD16, col=df_median_fct2$grouped_term, main = "Spline regression and donor median markers of CD16 for all timepoints",
xlab = "time", ylab = "CD16", xaxt="n")
axis(1, at=1:12, labels=FALSE)
text(seq(1, 12, by=1), par("usr")[3] - 0.05, labels = lablist, srt = 45, adj = c(1.1,1.1), pos = 1, xpd = TRUE, cex=.8)
par(new=TRUE)
plot(model, axes = FALSE, ann=FALSE)## Warning in plot.xy(xy.coords(x, y), type = type, ...): "axes" is not a
## graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "axes" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "axes" is not
## a graphical parameter
CD16 seems to clearly go up after the boost condition. The reason I did not find it is that the increase seems to be omst prominent one day after the boost. It is still very low 6 hours after the boost injection.
Zoom in on marker granzyme B
#median marker plotting
ggplot(df_median_fct2, aes(term, GranzymeB, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
theme(axis.text.x = element_text(angle = 45, vjust=0))#median marker plotting per donor
ggplot(df_median_fct2, aes(term, GranzymeB, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
facet_wrap(~donor) +
theme(axis.text.x = element_text(angle = 45, vjust=0)) #spline regression
model <- gam(GranzymeB ~ s(numeric_term), data = df_samples_tfm2)
plot(model,
main = "Spline regression of protein marker GranzymeB for all timepoints",
xlab = "term") Granzyme B seems relatively high before vaccination. It then goes down after the prime vaccination, and then seems generally to go bak up in boost vaccination but only too similar levels to before prime. Generally there seems to be a slight increase in 3 out of 4 animals in the boost condition as compared to the prime condition. Granzyme B is highlighted as significant by LLMM and Palgen.
Looking into CD69
#median marker plotting
ggplot(df_median_fct2, aes(term, CD69, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
theme(axis.text.x = element_text(angle = 45, vjust=0))#median marker plotting per donor
ggplot(df_median_fct2, aes(term, CD69, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
facet_wrap(~donor) +
theme(axis.text.x = element_text(angle = 45, vjust=0)) #spline regression
model <- gam(CD69 ~ s(numeric_term), data = df_samples_tfm2)
plot(model,
main = "Spline regression of protein marker CD69 for all timepoints",
xlab = "term")#combined plot
model <- gam(CD69 ~ s(numeric_term), data = df_samples_tfm2)
plot(df_median_fct2$numeric_term, df_median_fct2$CD69, col=df_median_fct2$grouped_term, main = "Spline regression and donor median markers of CD69 for all timepoints",
xlab = "time", ylab = "CD69", xaxt="n")
axis(1, at=1:12, labels=FALSE)
text(seq(1, 12, by=1), par("usr")[3] - 0.1, labels = lablist, srt = 45, adj = c(1.1,1.1), pos = 1, xpd = TRUE, cex=.8)
par(new=TRUE)
plot(model, axes = FALSE, ann=FALSE)## Warning in plot.xy(xy.coords(x, y), type = type, ...): "axes" is not a
## graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "axes" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "axes" is not
## a graphical parameter
Similar to CD16, it seems that CD69 has it’s peak increase one day after the boost and therefore was not seen as significant by the models used in this thesis.
Looking into CCR5
#median marker plotting
ggplot(df_median_fct2, aes(term, CCR5, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
theme(axis.text.x = element_text(angle = 45, vjust=0))#median marker plotting per donor
ggplot(df_median_fct2, aes(term, CCR5, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
facet_wrap(~donor) + theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust=0)) ggsave(filename = "CCR5 facet per donor.jpg", width = 7, height = 4)
#spline regression
model <- gam(CCR5 ~ s(numeric_term), data = df_samples_tfm2)
plot(model,
main = "Spline regression of protein marker CCR5 for all timepoints",
xlab = "term") CCR5 starts off relatively high, has a decrease in the beginning of prime, rises back to original levels just before the prime and then decreases again. It seems that CCR5 is temporarily downregulated by each vaccination, without a lasting effect. Palgen et al.’s conclusion that CCR5 is upregulated in the boost condition does not seem to be the case and the findings of the LLMM model seem more realistic that it is generally downregulated. If the boost condition had been measured longer than just 3 days after the vaccination, the same pattern of quantities going back to baseline may have been observed. They probably found significant due to one outlier.
Looking into CD11c
#median marker plotting
ggplot(df_median_fct2, aes(term, CD11c, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
theme(axis.text.x = element_text(angle = 45, vjust=0))#median marker plotting per donor
ggplot(df_median_fct2, aes(term, CD11c, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
facet_wrap(~donor) +
theme(axis.text.x = element_text(angle = 45, vjust=0)) #spline regression
model <- gam(CD11c ~ s(numeric_term), data = df_samples_tfm2)
plot(model,
main = "Spline regression of protein marker CD11c for all timepoints",
xlab = "term")model <- gam(CD11c ~ s(numeric_term), data = df_samples_tfm2)
plot(df_median_fct2$numeric_term, df_median_fct2$CD11c, col=df_median_fct2$grouped_term, main = "Spline regression and donor median markers of CD11c for all timepoints",
xlab = "time", ylab = "CD11c", xaxt="n")
axis(1, at=1:12, labels=FALSE)
text(seq(1, 12, by=1), par("usr")[3] - 0.1, labels = lablist, srt = 45, adj = c(1.1,1.1), pos = 1, xpd = TRUE, cex=.8)
par(new=TRUE)
plot(model, axes = FALSE, ann=FALSE)## Warning in plot.xy(xy.coords(x, y), type = type, ...): "axes" is not a
## graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "axes" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "axes" is not
## a graphical parameter
Similarly to CD16 and CD69, it seems that the discrepancy found in Palgen et al.’s results and mine are because the strongest increase in CD11c occured one day after the boost vaccination. It seems to fluctuate around during prime, have a slight decrease just after the boost (3 and 6 hours after), and then rise rather rapidly one day after the boost. The scale here goes up to 0.9, slightly higher than some others, nowhere near as clear as CD66 that went up to 4.
Looking into CCR7
#median marker plotting
ggplot(df_median_fct2, aes(term, CCR7, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
theme(axis.text.x = element_text(angle = 45, vjust=0))#median marker plotting per donor
ggplot(df_median_fct2, aes(term, CCR7, color = grouped_term)) +
geom_jitter(width = 0.2, alpha = 0.5) +
facet_wrap(~donor) +
theme(axis.text.x = element_text(angle = 45, vjust=0)) #spline regression
model <- gam(CCR7 ~ s(numeric_term), data = df_samples_tfm2)
plot(model,
main = "Spline regression of protein marker CCR7 for all timepoints",
xlab = "term")